import requests
from bs4 import BeautifulSoup
from newspaper import Article

HEADERS = {"User-Agent": "Mozilla/5.0"}

def fetch_news_list(page=1):
    url = f"https://finance.sina.com.cn/roll/c/56592.shtml?page={page}"
    resp = requests.get(url, headers=HEADERS, timeout=10)
    resp.encoding = "utf-8"
    soup = BeautifulSoup(resp.text, "html.parser")

    news_items = []
    for li in soup.select("#listcontent li"):
        a = li.find("a")
        span = li.find("span")
        if a and a.get("href"):
            news_items.append({
                "title": a.get_text(strip=True),
                "url": a["href"],
                "time": span.get_text(strip=True) if span else None
            })
    return news_items

def parse_article(url):
    try:
        article = Article(url, language="zh")
        article.download()
        article.parse()
        return {
            "title": article.title,
            "publish_date": article.publish_date,
            "text": article.text[:200]  # 截取前200字
        }
    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    news_list = fetch_news_list(page=1)
    print(f"抓取到 {len(news_list)} 条新闻链接")
    for item in news_list[:5]:  # 只解析前5条
        print("列表标题：", item["title"])
        print("列表时间：", item["time"])
        print("链接：", item["url"])
        detail = parse_article(item["url"])
        if "error" in detail:
            print("正文解析失败：", detail["error"])
        else:
            print("正文标题：", detail["title"])
            print("正文片段：", detail["text"])
        print("="*60)
